---
title: 'Replication materials for "Islam and Mass Preferences towards Foreign Direct Investment in Tunisia"'
author: "`r Sys.Date()`"
knit: (function(input_file, encoding, ...) {rmarkdown::render(input_file, encoding=encoding, output_dir=getwd())}) # wut
output:
  pdf_document:
    keep_tex: yes
    toc: true
    toc_depth: 2
    includes:
      in_header: baser/base.tex
    fig_caption: true
geometry: margin=1in
subparagraph: true
linkcolor: blue
fontfamily: fourier
vignette: >
  %\VignetteIndexEntry{jepsr}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: inline
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(out.extra  = '',
                      fig.pos    = "!b",
                      dev        = 'png',
                      dpi        = 300,
                      fig.height = 3.5,
                      echo       = FALSE,
                      message    = FALSE, 
                      warning    = FALSE,
                      include    = FALSE,
                      eval       = TRUE,
                      progress   = FALSE,
                      results    = 'asis')
alpha_max  <- 1/8 # goodness, what even is this?
state_size <- "footnotesize"
```

####### <!-- Data loading and processing -->

```{r source}
# provides the function `sumer` for enhanced tidy model summaries
invisible(sapply(dir(here::here("vignettes", "data", "wickr", "R"), full.names = TRUE), source))

# provides the function `galaxy` for making regression tables
source(here::here("vignettes", "data", "galaxy", "R", "galaxy.R"))
```

```{r ingest_cj, eval=FALSE}
# this block imports the full raw conjoint data set and exports the data file used for the analysis
# this block is not to be run by the replicator

# load the raw data (full file is not provided)
cj_z <- haven::read_sav(here::here("vignettes", "data", "conjoint-full-resaved.sav"))

# convert spss labelled numeric data to factor
cj_a <- dplyr::mutate(cj_z, dplyr::across(tidyselect:::where(haven::is.labelled), haven::as_factor))

# mark non-substantive responses (whose text includes "don't read" or "Don't read") as missing
cj_a <- dplyr::mutate(cj_a, dplyr::across(tidyselect:::where(is.factor), function(x) {
  `levels<-`(x, ifelse(stringr::str_detect(levels(x), "on't read"), NA, levels(x)))
}))

# select only the columns that are used in the analysis
cj_a <- dplyr::group_by(cj_a, .data$ResponseId) 
cj_a <- dplyr::select(cj_a,
                      .data$Relig2,  # religiosity
                      .data$Setup1b, # gender
                      .data$Demo1,   # age
                      .data$Emp1,    # employment status
                      .data$Inc1,    # income    (for comparison to AB4 only -- not modeled)
                      .data$Demo10,  # education (for comparison to AB4 only -- not modeled)
                      .data$Demo4,   # urbanity  (for comparison to AB4 only -- not modeled)
                      tidyselect::matches("^cj.+(a|b)"), # outcome variables
                      # treatment aspects
                      tidyselect::matches("(nationality|benefactor|industry|urbanrural|skill|wadu)\\d+$"))

# save the data file used in the analysis
saveRDS(cj_a, here::here("vignettes", "data", "cj_a.rds"))
```
```{r reload_cj}
# load the data file used in the conjoint analysis
cj_a <- readRDS(here::here("vignettes", "data", "cj_a.rds"))
```
```{r make_x_cj}
# this block makes the covariates and the subsetting variables
# then subsets the data as appropriate

# select the columns the covariates and subsets are made of
cj_x <- dplyr::select(cj_a, 
                      Religiosity = .data$Relig2, 
                      Gender      = .data$Setup1b, 
                      Age         = .data$Demo1,
                      Employ      = .data$Emp1,
                      Income      = .data$Inc1,
                      Education   = .data$Demo10,
                      Urbanity    = .data$Demo4)
cj_x <- dplyr::group_by(cj_x)

# shorten label names for religiosity
cj_x <- dplyr::mutate(cj_x, Religiosity = `levels<-`(.data$Religiosity, stringr::str_replace(levels(.data$Religiosity), " religious", "")))

# create variables for the conditional analyses by employment in the under-40 subset
cj_x <- dplyr::mutate(cj_x, 
                      Emp = factor(!(.data$Employ == "Unemployed")), # TRUE if not unemployed
                      U40 = .data$Age < 40,                          # TRUE if under 40
                      y_m = .data$U40 & (.data$Gender == "Male"),    # TRUE if under 40 AND male
                      All = TRUE)

# set aside for balance comparison with Arab Barometer 4 
cj_x_bak <- cj_x

# in the analysis set, retain only columns used in the analysis
cj_x <- dplyr::select(cj_x,
                      -.data$Income,
                      -.data$Education,
                      -.data$Urbanity)

# pivot the respondent-varying covars for conditional analysis into one column
cj_x <- tidyr::pivot_longer(cj_x, c("Religiosity", "Gender", "Emp"), "covar",  values_to = "x")

# pivot the different subset indicators into one column
cj_x <- tidyr::pivot_longer(cj_x, c("All", "U40", "y_m"),            "subset", values_to = "OK")

# drop rows not included in the subset for each analysis
cj_x <- dplyr::filter(cj_x, .data$OK)

# drop data for analyses not performed (by employment on the whole data set, or by other covars on the subset)
cj_x <- dplyr::filter(cj_x, (.data$covar == "Emp" & .data$subset != "All") |
                            (.data$covar != "Emp" & .data$subset == "All")) 
```
```{r make_y_cj}
# this block prepares the rating and choice response variables
# the raw ratings for investments 1 and 2 each round (N from 1 to 5) are from 1 to 7, where 1 is best
# the raw choice in each round is a categorical variable indicating investment A or B
# the prepared variables are both numeric where higher values indicate more support

# select the variables that record responses
cj_y <- dplyr::select(cj_a, tidyselect::matches("^cj.+(a|b)"))

# convert y variables to numeric
cj_y <- dplyr::mutate(cj_y, dplyr::across(tidyselect::everything(), as.numeric))

# reverse-code investment rating
cj_y <- dplyr::mutate(cj_y, dplyr::across(tidyselect::matches("^cj.+a"), `-`, y = 8))
cj_y <- dplyr::mutate(cj_y, dplyr::across(tidyselect::matches("^cj.+a"), `-`))

# one-hot encode investment choice
cj_y <- tidyr::pivot_longer(cj_y, tidyselect::matches("^cj.+b"), "Choice", values_to = "choice")
cj_y <- dplyr::mutate(cj_y, i = 1)
cj_y <- tidyr::pivot_wider(cj_y, names_from = c("Choice", "choice"), values_from = "i", values_fill = 0)
cj_y <- dplyr::select(cj_y, -tidyselect::matches("^cj.+b_NA"))

# pivot dependent variables into a single value column with an ID column for which response
cj_y <- tidyr::pivot_longer(cj_y, tidyselect::matches("^cj.+(a|b)"), "response")

# parse the info in the ID column to pick out round (1-5), investment option (1-2), and response type (rating/choice)
cj_y <- dplyr::mutate(cj_y,
                      round    = as.numeric(stringr::str_sub(.data$response,  3,  3)),
                      invest   = as.numeric(stringr::str_sub(.data$response, -1, -1)),
                      response = factor(stringr::str_sub(.data$response, 4, 4), labels = c("rating", "choice")))

# drop choice where missing in the raw data
cj_y <- dplyr::group_by(cj_y, .data$ResponseId, .data$round, .data$response)
cj_y <- dplyr::mutate(cj_y, sweep = all(.data$response == "choice" & .data$value == 0))
cj_y <- dplyr::filter(cj_y, !.data$sweep)
```
```{r make_t_cj}
# make treatment variables

# select the columns that record the six aspects of each conjoint treatment
cj_t <- dplyr::select(cj_a,       tidyselect::matches("(nationality|benefactor|industry|urbanrural|skill|wadu)\\d+$"))

# pivot the treatment aspects into a single column
cj_t <- tidyr::pivot_longer(cj_t, tidyselect::matches("(nationality|benefactor|industry|urbanrural|skill|wadu)\\d+$"))

# parse item number (1-10) and treatment aspect (nationality, etc.) from the name column
cj_t <- dplyr::mutate(cj_t,
                      item = stringr::str_extract(.data$name, "\\d+$"),
                      name = stringr::str_remove(.data$name,  "\\d+$"))

# parse round (1-5) and investment number (1-2) from the item column
cj_t <- dplyr::mutate(cj_t,
                      item   = as.numeric(.data$item),
                      round  = (1 + .data$item) %/% 2,
                      invest =  2 - (.data$item %%  2))

# pivot the six treatment aspects back into separate columns
cj_t <- tidyr::pivot_wider(cj_t, names_from = "name", values_from = "value")

# replace values that are empty strings with NA
cj_t <- dplyr::group_by(cj_t)
cj_t <- dplyr::mutate(cj_t, dplyr::across(tidyselect:::where(is.character), stringr::str_replace, pattern = "^$", replacement = "NA"))

# drop one particular ResponseId that has no data at all
cj_t <- dplyr::filter(cj_t, !.data$nationality == "NA")

# provide short versions of the treatment levels so they fit on the graphs
# and choose the _baseline_ levels used in the final version of the main analyses
# (i.e. jobs for men, instead of NA, is the "control" benefactor value against which effects of other values are compared)
cj_t <- dplyr::mutate(cj_t,
                      nationality = factor(.data$nationality, levels = c("American", setdiff(unique(.data$nationality), "American"))),
                      benefactor  = factor(.data$benefactor,
                                           levels = c("for men", setdiff(sort(unique(.data$benefactor)), "for men")),
                                           labels = c("men", "w/o hijab", "w/ hijab", "incl. w/ hijab", "NA")),
                      industry    = factor(stringr::str_remove(.data$industry, "^in "),
                                           levels = c("call centers",
                                                      setdiff(sort(unique(stringr::str_remove(.data$industry, "^in "))), "call centers"))),
                      rural       = factor(.data$urbanrural, levels = c("rural", "urban")),
                      skill       = factor(.data$skill,
                                           levels = c("These jobs require workers to be highly educated and skilled.",
                                                      "These jobs do not require much education or skills.", "NA"),
                                           labels = c("high skill", "low skill", "NA")),
                      wadu        = factor(.data$wadu,
                                           levels = c("They will have no wadu areas or prayer rooms in their facilities.",
                                                      "They will have wadu areas and prayer rooms in their facilities.", "NA"),
                                           labels = c("no facilities", "facilities", "NA")))
```
```{r join_cj}
# put the processed data back together
# and do final processing required for the modeling function used

# join the frames of treatment, outcome, and covariate data on ResponseId
cj   <- dplyr::left_join(cj_t, cj_y)
cj   <- dplyr::left_join(cj,   cj_x)

# drop any rows with no response or no covariate
cj   <- dplyr::filter(cj,
                      !is.na(.data$value),
                      !is.na(.data$x))

# convert character variables to factors
cj   <- dplyr::mutate(cj, dplyr::across(tidyselect:::where(is.character), factor))

# create a _nested_ data frame of data by analysis specification (one choice of covariate, data subset, and outcome)
cj   <- dplyr::group_by(cj, .data$covar, .data$subset, .data$response)
cj   <- tidyr::nest(cj)

# convert the respondent-varying covariate in the data for each spec to a factor (separately)
cj   <- dplyr::group_modify(cj, function(.x, .y) {
  dplyr::tibble(data = list(dplyr::mutate(dplyr::first(.x$data), x = factor(.data$x))))
})
```

```{r ingest_sv, eval=FALSE}
# this block loads _raw_ social vignettes data, and exports the data file used in the analysis
# this block is not to be run in reproduction

# load the complete raw SPSS file of data (file not included)
the_data <- haven::read_sav(here::here("vignettes", "data", "social-full-resaved.sav"))

# convert from foreign SPSS data type as represented in R to native R factor
the_data <- dplyr::mutate(the_data, dplyr::across(tidyselect:::where(haven::is.labelled), haven::as_factor))

# retain only the columns to be used
the_data <- dplyr::group_by(the_data, .data$ResponseId)
the_data <- dplyr::select(the_data,
                          .data$social_treatment,
                          .data$Setup1b,
                          .data$Demo1,
                          .data$Demo4,
                          .data$Demo10,
                          .data$Inc1,
                          .data$Relig2,
                          .data$Vign1,
                          .data$Vign2_1)

# save the raw file used in the analysis
saveRDS(the_data, here::here("vignettes", "data", "the_data.rds"))
```
```{r reload_sv}
the_data <- readRDS(here::here("vignettes", "data", "the_data.rds"))
the_data <- dplyr::group_by(the_data)

# rename columns nicely
the_data <- dplyr::rename(the_data,
                          Frame       = .data$social_treatment,
                          Gender      = .data$Setup1b,
                          Age         = .data$Demo1,
                          Rural       = .data$Demo4,
                          Education   = .data$Demo10,
                          Income      = .data$Inc1,
                          Religiosity = .data$Relig2,
                          Support     = .data$Vign1,   # the support outcome
                          Rating      = .data$Vign2_1) # the rating  outcome

# put legible names on treatment levels
the_data <- dplyr::mutate(the_data,
                          # legible variables with legible names
                          Frame       = `levels<-`(factor(.data$Frame), 
                                                   paste(c("Control", "Exploitation", "Prayer", "Hijab", "Women"), "Frame")))

the_data_bak <- the_data # set aside a copy of unmodified data for balance evaluation

# transform to forms that will be used in modeling
the_data <- dplyr::mutate(the_data,
                          # education below, at, or above median education level in the data (secondary)
                          Education   = factor(sign(as.numeric(.data$Education) - median(as.numeric(.data$Education))),
                                               labels = c("Less", "Secondary", "BA or higher")),
                          # income above or not above median income level in the data
                          Income      = factor(as.numeric(.data$Income) > median(as.numeric(na.omit(.data$Income))),
                                               labels = c("Low Income", "High Income")))
```
```{r make_y_sv}
# transform the two ourcome vars to a single column

# wrap each outcome value in a list so their factor levels don't clash upon pivoting to a single column
the_data <- dplyr::group_by(the_data, .data$ResponseId)
the_data <- dplyr::mutate(the_data,
                          Support = list(.data$Support),
                          Rating  = list(.data$Rating))

# pivot to long format
the_data <- tidyr::pivot_longer(the_data, c("Support", "Rating"), "Y", values_to = "y")

# nest the data by outcome type
the_data <- dplyr::group_by(the_data, .data$Y)
the_data <- tidyr::nest(the_data)

# within each nested set, unwrap the outcomes
the_data <- dplyr::group_modify(the_data, function(.x, .y) {
  dplyr::tibble(data = list(tidyr::unnest(dplyr::first(.x$data), .data$y)))
})
the_data <- dplyr::mutate(the_data, OK = TRUE)
```

####### <!-- Modeling -->

```{r model_cj}
# cjoint::amce (https://cran.r-project.org/package=cjoint) finds average marginal component effects
AMCE <- dplyr::mutate(cj, amce = list(cjoint::amce(value ~ (nationality + benefactor + industry + rural + skill + wadu) * x, 
                                                   dplyr::first(.data$data),
                                                   respondent.varying = "x",
                                                   respondent.id      = "ResponseId")))
```
```{r model_sv}
# two kinds of models: OLS and ordinal
the_ways <- dplyr::tibble(ordinal = list(ordinal::clm), OLS = list(lm))
the_ways <- tidyr::pivot_longer(the_ways, tidyselect::everything(), "model", values_to="fn")
the_ways <- dplyr::mutate(the_ways, OK = TRUE)

# two formulas: bivariate and multivariate
the_form <- dplyr::tibble(basic = list(y ~ Frame),
                          full  = list(y ~ Frame + Religiosity + Gender + Age + Rural + Education + Income))
the_form <- tidyr::pivot_longer(the_form, tidyselect::everything(), "spec", values_to="form")
the_form <- dplyr::mutate(the_form, OK = TRUE)

# create a tibble where each row is a specification (an outcome, a model, and a formula)
the_ways <- dplyr::left_join(the_ways, the_form)
the_ways <- dplyr::left_join(the_ways, the_data)
the_ways <- dplyr::group_by(the_ways, .data$model, .data$fn, .data$spec, .data$form, .data$Y)

# convert the outcome variable in the rows for OLS models to numeric instead of factor
# note, the OLS y variable is thus on a scale of negative 7 to negative 1, where negative 1 is highest approval
the_ways <- dplyr::group_modify(the_ways, function(.x, .y) {
  dplyr::tibble(data = list(dplyr::mutate(dplyr::first(.x$data),
                                          y = factor(.data$y, levels = setdiff(levels(.data$y), "Decline to answer [Don't read]")))))
})
the_ways <- dplyr::group_modify(the_ways, function(.x, .y) {
  dplyr::tibble(data = list(dplyr::mutate(dplyr::first(.x$data), y = if(.y$model == "OLS") {
    -as.numeric(.data$y) + max(na.omit(as.numeric(.data$y))) + 1
  } else {
    factor(.data$y, levels = rev(levels(.data$y)))
  })))
})

the_ways <- dplyr::group_by(the_ways, .data$model, .data$spec, .data$Y)

# run the model for each row, on the data for each row, using the formula for each row
the_ways <- dplyr::mutate(the_ways, result = list(do.call(dplyr::first(.data$fn),
                                                          list(formula = dplyr::first(.data$form), data = dplyr::first(.data$data)))))
```


\clearpage

# README

```{r readme, include = TRUE}
readme <- readLines(here::here("README"))
readme <- stringr::str_replace_all(readme, "^```", "\\\\begin{verbatim}```")
readme <- stringr::str_replace_all(readme, ".+```$", "```\\\\end{verbatim}")
readme <- paste0(readme, "\n")
invisible(purrr::map(readme, cat))
```

\clearpage

# Paper

## Experimental Design I (Conjoint)

```{r plot_cj_choice, include = TRUE}
# for amce objects, gives a tibble of average effects and effect differences
cj_out <- sumer(dplyr::select(AMCE, .data$amce))

# the main effects are taken from the analysis by gender because gender isn't missing for any respondent
cj_out_main <- dplyr::filter(cj_out,
                             .data$response == "choice",
                             .data$covar    == "Gender",
                             .data$outcome  == "Average Effect")

ggplot2::ggplot(cj_out_main,
                ggplot2::aes(x     = .data$estimate,
                             xmax  = .data$estimate + 2 * .data$std.error,
                             xmin  = .data$estimate - 2 * .data$std.error,
                             color = .data$term,
                             y     = paste0(.data$level, " (vs. ", .data$baseline, ")"))) +
  ggplot2::xlab("Effect on zero-one scale") + ggplot2::ylab("") +
  ggplot2::ggtitle("Effect on choice of FDI") +
  ggplot2::facet_grid(rows = dplyr::vars(term), scales = "free", space = "free") +
  ggplot2::scale_color_discrete(guide = FALSE) +
  ggplot2::geom_vline(xintercept = 0) +
  ggplot2::geom_pointrange()
```

## Experimental Design II (Vignettes)

```{r tab_med_sv}
# wait, what actually is the median of each of the scales?
the_data <- dplyr::mutate(the_data, 
                          med_num = median(na.omit(as.numeric(dplyr::pull(dplyr::first(.data$data, .data$Y))))),
                          med_fac = factor(.data$med_num, 
                                           levels = seq_along(levels(dplyr::pull(dplyr::first(.data$data, .data$Y)))), 
                                           labels =           levels(dplyr::pull(dplyr::first(.data$data, .data$Y)))),
                          above   = mean(na.omit(as.numeric(dplyr::pull(dplyr::first(.data$data, .data$Y)))) <= .data$med_num))
```
```{r simulate_sv, eval = FALSE}
# this block implements statistical simulation (King, Tomz, and Wittenberg 2000)
# it is not required to run this block; a full set of simulation outputs is saved

n_b  <- 1000 # for production; comment out for diagnosis
# n_b  <- 2    # for diagnosis;  comment out for production

# select the models with the multivariate spec
full <- dplyr::filter(the_ways, .data$spec  == "full")
# full <- dplyr::filter(full,     .data$model == "ordinal")

# take 1000 random draws of the coefs
full <- dplyr::mutate(full, m_y = median(na.omit(as.numeric(dplyr::pull(dplyr::first(.data$data), .data$y)))))
full <- dplyr::mutate(full, b   = list(stats::coef(dplyr::first(.data$result))))
full <- dplyr::mutate(full, v   = list(stats::vcov(dplyr::first(.data$result))))
full <- dplyr::mutate(full, B   = list(mvtnorm::rmvnorm(n_b, dplyr::first(.data$b), dplyr::first(.data$v))))

# create separate copies of the data set with the treatment set to each of the separate levels for everyone
tmnt <- dplyr::mutate(full, tr  = list(dplyr::select(dplyr::first(.data$data), .data$Frame)))
tmnt <- dplyr::group_modify(tmnt, function(.x, .y) {dplyr::group_by(dplyr::first(.x$tr), .data$Frame)})
tmnt <- dplyr::group_by(tmnt)
tmnt <- dplyr::group_by(tmnt, dplyr::across(tidyselect::everything()))
tmnt <- dplyr::summarise(tmnt)

full <- dplyr::left_join(full, tmnt)
full <- dplyr::group_by(full, .data$model, .data$Y, .data$Frame)
full <- dplyr::group_modify(full, function(.x, .y) {
  dplyr::mutate(.x, data = list(dplyr::mutate(dplyr::first(.data$data), Frame = dplyr::first(.y$Frame))))
})

full <- dplyr::mutate(full, M   = list(stats::model.matrix(dplyr::first(.data$form), dplyr::first(.data$data))))

# simulation is actually done only for the ordinal models
ordo <- dplyr::filter(full, .data$model == "ordinal")

# compute simulated linear predictors
ordo <- dplyr::mutate(ordo, th  = list(dplyr::first(.data$B)[,   setdiff(colnames(dplyr::first(.data$B)),
                                                                         colnames(dplyr::first(.data$M)))]))
ordo <- dplyr::mutate(ordo, B   = list(dplyr::first(.data$B)[, intersect(colnames(dplyr::first(.data$B)),
                                                                         colnames(dplyr::first(.data$M)))]))
ordo <- dplyr::mutate(ordo, B   = list(cbind(c("(Intercept)" = 0), dplyr::first(.data$B)))) # ordinal model has no intercept term!
ordo <- dplyr::mutate(ordo, eta = list(dplyr::first(.data$M) %*% t(dplyr::first(.data$B))))
ordo <- dplyr::mutate(ordo, eta = list(t(dplyr::first(.data$eta))))

# regroup data
ordo <- dplyr::select(ordo, .data$th, .data$eta)
ordo <- tidyr::pivot_longer(ordo, tidyselect::any_of(c("th", "eta")))
ordo <- dplyr::group_by(ordo, .data$model, .data$Y, .data$Frame, .data$name)

ordo <- dplyr::mutate(ordo, value = list(dplyr::as_tibble(dplyr::first(.data$value))))
ordo <- dplyr::mutate(ordo, value = list(dplyr::mutate(dplyr::first(.data$value), i = dplyr::row_number())))
ordo <- dplyr::mutate(ordo, value = list(tidyr::pivot_longer(dplyr::first(.data$value),
                                                             !tidyselect::matches("^i$"),
                                                             dplyr::first(.data$name),
                                                             values_to = toupper(dplyr::first(.data$name)))))

ordo <- dplyr::summarise(ordo, value = .data$value) # drop the last grouping column
ordo <- dplyr::summarise(ordo, value = list(purrr::reduce(.data$value, dplyr::left_join)))

# compute _cumulative_ probabilities
ordo <- tidyr::unnest(ordo, .data$value)
ordo <- dplyr::mutate(ordo, mu = plogis(.data$ETA, .data$TH))

# compute _categorical_ probabilites
ordo <- dplyr::group_by(ordo, .data$model, .data$Y, .data$Frame, .data$i, .data$eta)
ordo <- dplyr::summarise(ordo, p = list(-diff(c(1, .data$mu, 0))), .groups = "keep")

# draw a random outcome given categorical probabilities
ordo <- dplyr::mutate(ordo, y = sample(seq_along(dplyr::first(.data$p)), 1, FALSE, dplyr::first(.data$p)))

# summarise fraction of simulated outcomes above threshold by simulated treatment
ordo <- dplyr::left_join(ordo, dplyr::select(full, .data$m_y))

door <- ordo

ordo <- dplyr::summarise(ordo, y = .data$y >= .data$m_y)
ordo <- dplyr::summarise(ordo, y = mean(.data$y))
ordo <- dplyr::summarise(ordo,
                         estimate = mean(.data$y),
                         lower    = quantile(.data$y, 0.025),
                         upper    = quantile(.data$y, 0.975))
```
```{r save_sv_ordinal, eval = FALSE}
# save the simulation output
# it is not required to run this block; a full set of simulation outputs is saved
# for consistency, do not run this block as it will overwrite the saved outputs
saveRDS(ordo, here::here("vignettes", "data", "ordo.rds"))
```
```{r load_sv_ordinal}
ordo <- readRDS(here::here("vignettes", "data", "ordo.rds"))
```
```{r plot_sv_ordinal, include = TRUE}
ordo <- dplyr::mutate(ordo, Frame = factor(.data$Frame, labels = stringr::str_remove(levels(.data$Frame), " Frame$")))
ordo <- dplyr::left_join(ordo, dplyr::select(the_data, .data$med_fac))
ordo <- dplyr::mutate(ordo, y = paste0(.data$Y, "\n(", .data$med_fac, " or stronger)"))

ggplot2::ggplot(ordo, ggplot2::aes(x     = .data$estimate, 
                                   xmax  = .data$upper, 
                                   xmin  = .data$lower, 
                                   y     = reorder(.data$Frame, dplyr::desc(.data$Frame)),
                                   color = .data$Frame)) +
  ggplot2::ylab("Frame") + ggplot2::xlab("Simulated percent approval") +
  ggplot2::ggtitle("Predicted percent approval by outcome and frame") +
  ggplot2::facet_grid(rows = dplyr::vars(y), as.table = FALSE) + 
  ggplot2::geom_pointrange()
```

#### Scale medians (footnote in text)

```{r tab_median_sv, include = TRUE}
the_median <- dplyr::select(the_data,   .data$med_fac, .data$above)
the_median <- dplyr::mutate(the_median, above = round(.data$above * 100, 1))
the_median <- dplyr::mutate(the_median, above = paste0(format(.data$above), "%"))
the_median <- dplyr::rename(the_median,
                            Scale         = .data$Y,
                            Median        = .data$med_fac,
                            "At or above" = .data$above)

knitr::kable(the_median)
```

\clearpage

# Appendix

## Experimental Design I (Conjoint)

### Additional main result

```{r plot_cj_rating, eval = TRUE, include = TRUE}
cj_out_alt <- dplyr::filter(cj_out,
                            .data$response == "rating",
                            .data$covar    == "Gender",
                            .data$outcome  == "Average Effect")

ggplot2::ggplot(cj_out_alt,
                ggplot2::aes(x     = .data$estimate,
                             xmax  = .data$estimate + 2 * .data$std.error,
                             xmin  = .data$estimate - 2 * .data$std.error,
                             color = .data$term,
                             y     = paste0(.data$level, " (vs. ", .data$baseline, ")"))) +
  ggplot2::xlab("Effect on seven-point scale") + ggplot2::ylab("") +
  ggplot2::ggtitle("Effect on rating of FDI") +
  ggplot2::facet_grid(rows = dplyr::vars(term), scales = "free", space = "free") +
  ggplot2::scale_color_discrete(guide = FALSE) +
  ggplot2::geom_vline(xintercept = 0) +
  ggplot2::geom_pointrange()
```

\clearpage

### Covariate analyses

```{r all, eval=TRUE}
# helper function
depack <- function(foo, baz, name, value) {
  baz <- dplyr::as_tibble(baz)
  baz <- tidyr::pivot_longer(baz, tidyselect::everything(), name, values_to = value)

  dplyr::left_join(foo, baz)
}

# this function takes second differences for _all_ possible pairs of levels of the respondent-varying covariate
# (but _only_ for each level of each _treatment_ aspect versus the default baseline level)
#
# this much work is required only for the religiosity analysis, since the religiosity variable has 3 levels
# for covariates with only two levels, the right info could just be extracted from the AMCE model object
#
# the original, accepted submission did this with statistical simulation (King, Tomz, and Wittenberg 2000)
# but the final version does it in exact math instead
second_diff <- function(x) {
  # much output wrangling -- the model object output by cjoint::amce has pretty nonstandard structure
  b <- x$cond.estimates     # coefs and SEs are stored in a list of 2-row matrices
  
  # convert these to a data frame in long format
  b <- lapply(b, list)                                              # now it is a list of lists of matrices
  b <- dplyr::as_tibble(b)                                          # now it is a nested tibble in wide form
  b <- tidyr::pivot_longer(b, tidyselect::everything(), "Term")     # now it is a nested tibble in long form
  b <- dplyr::filter(b, stringr::str_detect(.data$Term, ":"))       # retain only the interaction terms
  b <- dplyr::group_by(b, .data$Term)
  b <- dplyr::mutate(b, value = list(t(dplyr::first(.data$value)))) # now value is a list of 2-column matrices
  b <- dplyr::mutate(b, value = list(dplyr::as_tibble(dplyr::first(.data$value), rownames = "Level")))
  b <- tidyr::unnest(b, .data$value)                                # now b is just a data.frame
  b <- dplyr::select(b,                                             # drop the column of standard errors and rename
                     Level = .data$Level,
                     value = .data$`Conditional Estimate`)
  
  b <- dplyr::mutate(b,                                                # parse the mangled term and level names
                     term  = stringr::str_remove(.data$Term,  ":.*$"), 
                     group = stringr::str_remove(.data$Level, "^.*:"), # the second half of Level is the _covar_ level
                     Level = stringr::str_remove(.data$Level, ":.*$")) # the first half is the _treatment_ level
  b <- depack(b, x$user.levels, "Level", "level")                      # smash in readable versions of the treatment
  b <- dplyr::group_by(b, .data$Term, .data$term, .data$Level, .data$level)
  
  # staple in some placeholder values for the covar level used as a baseline when running the model
  b <- dplyr::summarise(b,
                        value = list(c(0,            .data$value)),
                        group = list(c("(Baseline)", .data$group)))
  b <- tidyr::unnest(b, .data$value, .data$group)
  
  B <- dplyr::rename(b, Group = .data$group, Value = .data$value) # make another copy
  b <- dplyr::left_join(b, B)                                     # now each row is a _pair_ of covar values
  b <- dplyr::filter(b, .data$group < .data$Group)                # keep only unique pairs
  b <- dplyr::group_by(b, .data$Term, .data$term, .data$Level, .data$level, .data$group, .data$Group)
  
  # at last, take the difference between conditional effects
  b <- dplyr::summarise(b, estimate = .data$value - .data$Value, .groups = "keep")
  
  # mangle the group names to match row and column names in the covariance matrix
  b <- dplyr::mutate(b, baseline = paste0(.data$Level, ":"))
  b <- dplyr::mutate(b, Level    = ifelse(.data$group == "(Baseline)", "", .data$baseline))
  b <- dplyr::mutate(b,
                     g1 = paste0(.data$Level,    .data$group),
                     g2 = paste0(.data$baseline, .data$Group))
  
  # add a placeholder row and column to the covariance matrix
  v <- cbind(`(Baseline)` = 0, rbind(`(Baseline)` = 0, x$vcov.resp)) 
  
  # compute the standard error of the difference by subsetting the covariance matrix
  b <- dplyr::mutate(b, std.error = sqrt(v[.data$g1, .data$g1] + v[.data$g2, .data$g2] - 2 * v[.data$g1, .data$g2]))
  
  # further wrangling for presentation
  b <- depack(b, x$user.levels, "group", "Group 1")
  b <- depack(b, x$user.levels, "Group", "Group 2")
  b <- dplyr::mutate(b, `Group 1` = ifelse(is.na(.data$`Group 1`), x$baselines[["x"]], .data$`Group 1`))
  b <- dplyr::group_by(b, .data$term, .data$level, .data$`Group 1`, .data$`Group 2`)
  b <- dplyr::select(b, .data$estimate, .data$std.error)
  
  # finally, compute z-statistic and p-value
  b <- dplyr::mutate(b,
                     statistic = .data$estimate / .data$std.error,
                     p.value   = 2 * pnorm(abs(.data$statistic), lower.tail = FALSE))
  
  b
}

amce_covar <- dplyr::filter(AMCE, .data$subset == "All")
amce_covar <- dplyr::mutate(amce_covar, result = list(second_diff(dplyr::first(.data$amce))))

cj_out_covar <- dplyr::select(amce_covar, .data$result)
cj_out_covar <- tidyr::unnest(cj_out_covar, .data$result)
cj_out_covar <- dplyr::group_by(cj_out_covar, .data$covar, .data$response)
cj_out_covar <- dplyr::select(cj_out_covar,
                              Attribute  = .data$term,
                              Level      = .data$level,
                              `Group 1`  = .data$`Group 1`,
                              `Group 2`  = .data$`Group 2`,
                              Difference = .data$estimate,
                              SE         = .data$std.error,
                              z          = .data$statistic,
                              p          = .data$p.value)

# retain for tabulation only results with p < 0.1
# examine the R object itself before running this line if inspecting all results is desired
# but the overall story is not many at all of these treatment aspects matter differently by respondent-varying covariate
cj_out_covar <- dplyr::filter(cj_out_covar, .data$p < 0.1)

# format numeric values and nest by analysis specification
cj_out_covar <- dplyr::mutate(cj_out_covar, dplyr::across(tidyselect:::where(is.numeric), round,  digits = 4))
cj_out_covar <- dplyr::mutate(cj_out_covar, dplyr::across(tidyselect:::where(is.numeric), format, digits = 4, sci = FALSE))
cj_out_covar <- tidyr::nest(cj_out_covar)

# create simple tables
cj_out_covar <- dplyr::mutate(cj_out_covar,
                              caption = paste0("Differences by ", dplyr::first(.data$covar),
                                               " in effect on ",  dplyr::first(.data$response)))
cj_out_covar <- dplyr::mutate(cj_out_covar, 
                              kable = list(pander::pandoc.table.return(dplyr::first(.data$data), 
                                                                       digits       = 4,
                                                                       justify      = "llllrrrr",
                                                                       # caption      = .data$caption,
                                                                       split.tables = Inf)))
cj_out_covar <- dplyr::mutate(cj_out_covar,
                              kable = paste0("\n\n", .data$caption, "\n\n", dplyr::first(.data$kable)))
```

#### Gender

```{r gender, include = TRUE}
cj_out_gend <- dplyr::filter(cj_out_covar, .data$covar == "Gender")
invisible(purrr::map(cj_out_gend$kable, cat))
```

#### Religiosity

```{r religiosity, include = TRUE}
cj_out_gend <- dplyr::filter(cj_out_covar, .data$covar == "Religiosity")
invisible(purrr::map(cj_out_gend$kable, cat))
```

\clearpage

#### Unemployment, among respondents under 40

```{r u40_all, include=TRUE}
amce_u40 <- dplyr::filter(AMCE, .data$covar == "Emp")
amce_u40 <- dplyr::group_by(amce_u40, .data$subset, .data$response)
amce_u40 <- dplyr::select(amce_u40, .data$amce)
amce_u40 <- dplyr::group_by(amce_u40, .data$subset)
amce_u40 <- tidyr::nest(amce_u40)

amce_u40 <- dplyr::mutate(amce_u40,
                          galaxy = list(galaxy(dplyr::first(.data$data),
                                               .sumer = sumer,
                                               .incol = "outcome",
                                               .float = FALSE,
                                               .modno = FALSE,
                                               .below = "n",
                                               .print = FALSE,
                                               .captn = "Average component effects and conditional differences by employment")))

cat(dplyr::first(amce_u40$galaxy))
```

\clearpage

#### Unemployment, among male respondents under 40

```{r u40_male, include = TRUE}
cat(dplyr::last(amce_u40$galaxy))
```

\clearpage

## Experimental Design II (Vignettes)

### OLS estimates

```{r ols_plot, include = TRUE}
# this block graphs marginal effect by treatment for the OLS models

# pick out the models to be included in the plot
the_regs <- dplyr::filter(the_ways, .data$model == "OLS", .data$spec == "full")

# margins = TRUE produces a summary of marginal effects via the `margins` package
the_regs <- sumer(dplyr::select(the_regs, .data$result), margins = TRUE)

the_regs <- dplyr::filter(the_regs, .data$term == "Frame")
the_regs <- dplyr::mutate(the_regs,
                          upper = .data$estimate + 2 * .data$std.error,
                          lower = .data$estimate - 2 * .data$std.error)

ggplot2::ggplot(the_regs, ggplot2::aes(y     = factor(.data$level, levels = rev(levels(factor(.data$level)))),
                                       x     = .data$estimate,
                                       xmax  = .data$upper,
                                       xmin  = .data$lower,
                                       color = .data$level)) +
  ggplot2::facet_grid(rows = dplyr::vars(Y)) + 
  ggplot2::scale_color_discrete(guide = FALSE) +
  ggplot2::ggtitle("Frame effects (OLS) on approval by outcome") +
  ggplot2::xlab("Unit shift in approval (Support: 1-4 scale; Rating: 1-7 scale)") + ggplot2::ylab("") +
  ggplot2::geom_vline(xintercept = 0) +
  ggplot2::geom_pointrange()
```

\clearpage

### Tables

```{r vig_tables}
# this block constructs (but does not display) regression tables for the vignettes
the_vigs <- dplyr::group_by(the_ways, .data$model)
the_vigs <- dplyr::mutate(the_vigs, spec = as.character(as.numeric(factor(.data$spec))))
the_vigs <- dplyr::select(the_vigs, .data$spec, .data$Y, .data$result)
the_vigs <- tidyr::nest(the_vigs)
the_vigs <- dplyr::mutate(the_vigs, galaxy = galaxy(dplyr::first(.data$data),
                                                    .print = FALSE,
                                                    .sumer = sumer,
                                                    .captn = paste("Effects on FDI approval,", .data$model, "model"),
                                                    .float = FALSE,
                                                    .below = c("n", "AIC"),
                                                    .tomit = "\\|",
                                                    .modno = FALSE,
                                                    .headr = FALSE))
```

#### Ordered Logit

```{r ordinal_table, include = TRUE}
cat(dplyr::first(dplyr::filter(the_vigs, .data$model == "ordinal")$galaxy))
```

\clearpage

#### OLS

```{r ols_table, include = TRUE}
cat(dplyr::first(dplyr::filter(the_vigs, .data$model == "OLS")$galaxy))
```

\clearpage

## Covariate Balance (versus Arab Barometer 4)

```{r covariate_balance, include = TRUE}
# Arab Barometer 4 final weights provided by the Arab Barometer team
# this csv has been transformed from the original Excel file (Combined Demographics Tunisia.xls)
ab4 <- readr::read_csv(here::here("vignettes", "data", "Combined Demographics Tunisia.csv"))
ab4 <- dplyr::mutate(ab4, value = stringr::str_replace(.data$value, "^DT 1\\d00.+$", "DT 1000-2000"))
ab4 <- dplyr::mutate(ab4, value = stringr::str_replace(.data$value, "0-", "1-"))
ab4 <- dplyr::mutate(ab4, value = stringr::str_replace(.data$value, "^Complete secondary$", "Completed secondary"))
ab4 <- dplyr::mutate(ab4, value = ifelse(.data$value %in% c("Don't know", "Refused"), NA, .data$value))
ab4 <- dplyr::group_by(ab4, .data$variable, .data$value)
ab4 <- dplyr::summarise(ab4, `AB4 Final Weight` = sum(.data$AB4_final_weight))

# pull out the columns of each data set that are in the AB4 summary
cj_x_ab4     <- dplyr::rename(cj_x_bak,     Employment = .data$Employ)
cj_x_ab4     <- dplyr::select(cj_x_ab4,     tidyselect::any_of(unique(dplyr::pull(ab4, .data$variable))))

the_data_ab4 <- dplyr::rename(the_data_bak, Urbanity   = .data$Rural)
the_data_ab4 <- dplyr::select(the_data_ab4, tidyselect::any_of(unique(dplyr::pull(ab4, .data$variable))))

# stack the data sets
comp <- dplyr::tibble(conjoint  = list(cj_x_ab4),
                      vignettes = list(the_data_ab4))
comp <- tidyr::pivot_longer(comp, .data$conjoint:.data$vignettes, "study")
comp <- tidyr::unnest(comp, .data$value)

# no better way to convert the levels of each of these in the studies to the levels in AB4 than manually
comp <- dplyr::mutate(comp,
                      Education  = stringr::str_replace(.data$Education, "Illiterate / no education", "Unschooled"),
                      Education  = stringr::str_replace(.data$Education, "Elementary",                "Completed elementary"),
                      Education  = stringr::str_replace(.data$Education, "Intermediate",              "Completed elementary"),
                      Education  = stringr::str_replace(.data$Education, "Secondary",                 "Completed secondary"),
                      Education  = stringr::str_replace(.data$Education, "BA degree",                 "College"),
                      Education  = stringr::str_replace(.data$Education, "MA degree or higher",       "College"),
                      Income     = stringr::str_replace(.data$Income,    "601 - 700 dinars",          "601 - 800 dinars"),
                      Income     = stringr::str_replace(.data$Income,    "701 - 800 dinars",          "601 - 800 dinars"),
                      Income     = stringr::str_replace(.data$Income,    "801 - 900 dinars",          "801 - 1000 dinars"),
                      Income     = stringr::str_replace(.data$Income,    "901 - 1000 dinars",         "801 - 1000 dinars"),
                      Income     = stringr::str_replace(.data$Income,    " - ",                       "-"),
                      Income     = stringr::str_replace(.data$Income,    " dinars",                   ""),
                      Income     = stringr::str_replace(.data$Income,    "^",                         "DT "),
                      Income     = stringr::str_replace(.data$Income,    "DT More than 2000",         "DT 2000+"),
                      Income     = stringr::str_replace(.data$Income,    "DT 0-100",                  "Less than DT 100"),
                      Age        = factor(rowSums(outer(.data$Age, c(17, 24, 34, 44, 54), `>`)),
                                          levels = 0:5,
                                          labels = c("0-17", sort(dplyr::filter(ab4, .data$variable == "Age")$value))),
                      Employment = factor(.data$Employment, levels = c("Employed", "Unemployed")))

comp <- tidyr::pivot_longer(comp, -.data$study, "variable")
comp <- dplyr::group_by(comp, .data$study, .data$variable, .data$value)
comp <- dplyr::summarise(comp, pct = dplyr::n())
comp <- dplyr::mutate(comp, pct = .data$pct / sum(.data$pct))
comp <- dplyr::mutate(comp, value = ifelse(is.na(.data$value), "NA", .data$value))
comp <- tidyr::pivot_wider(comp, names_from = .data$study, values_from = .data$pct)

comp <- dplyr::left_join(comp, ab4)
comp <- dplyr::group_by(comp, .data$variable)
comp <- tidyr::nest(comp)
comp <- dplyr::group_modify(comp, function(.x, .y) {
  dplyr::tibble(data = if(.y$variable == "Income") {
    z <- dplyr::first(.x$data)
    z <- dplyr::arrange(z,  as.numeric(stringr::str_extract(.data$value, "\\d+")))
    
    list(z)
  } else {.x$data})
})
comp <- dplyr::group_modify(comp, function(.x, .y) {
  dplyr::tibble(data = if(.y$variable == "Education") {
    z <- dplyr::first(.x$data)
    z <- dplyr::mutate(z, value = factor(.data$value, 
                                         levels = c("Unschooled", "Completed elementary", "Completed secondary", "College")))
    z <- dplyr::arrange(z, .data$value)
    
    list(z)
  } else {.x$data})
})

comp <- dplyr::mutate(comp,
                      kable = list(xtable::xtable(dplyr::first(.data$data),
                                                  digits = 3,
                                                  caption = .data$variable,
                                                  align  = c("l", "l", "l", "l", "r"))))
comp <- dplyr::mutate(comp, kable = xtable::print.xtable(dplyr::first(.data$kable),
                                                         floating         = FALSE,
                                                         include.rownames = FALSE,
                                                         comment          = FALSE,
                                                         print.results    = FALSE))
# comp <- dplyr::mutate(comp, kable = paste0("\\captionof{table}{", .data$variable, "}\n", .data$kable))
comp <- dplyr::mutate(comp, kable = paste0("\n\n\\begin{center}", .data$kable, "\\end{center}\n"))
comp <- dplyr::mutate(comp, kable = paste0("\n\n### ", dplyr::first(.data$variable), "\n\n", .data$kable, "\n\n"))

invisible(purrr::map(comp$kable, cat))
```

\clearpage

## Covariate Balance (by vignette treatment)

```{r treatment_balance, include = TRUE}
# balance tables for social vignettes

# compute quantiles in age
age <- quantile(the_data_bak$Age, (1:3) / 4, na.rm=TRUE)

# bin ages by quantile and provide nice labels
the_data_bal <- dplyr::mutate(the_data_bak, age_bin = rowSums(outer(.data$Age, age, `>`)))
the_data_bal <- dplyr::group_by(the_data_bal, .data$age_bin)
the_data_bal <- dplyr::mutate(the_data_bal, age_bin = paste0(min(.data$Age), "-", max(.data$Age)))
the_data_bal <- dplyr::group_by(the_data_bal)

# put them in columns and leave them right there
the_data_bal <- dplyr::select(the_data_bal,
                              .data$Frame,
                              "Age (quartiles)" = .data$age_bin,
                              .data$Education,
                              .data$Gender,
                              .data$Income,
                              .data$Rural,
                              .data$Religiosity,
                              .data$Support,
                              .data$Rating)
the_data_bal <- dplyr::mutate(the_data_bal, dplyr::across(tidyselect:::where(is.factor), as.character))
the_data_bal <- dplyr::mutate(the_data_bal, Frame = stringr::str_remove(.data$Frame, " Frame$"))
the_data_bal <- tidyr::pivot_longer(the_data_bal, -.data$Frame)
the_data_bal <- dplyr::group_by(the_data_bal, .data$name)
the_data_bal <- tidyr::nest(the_data_bal)

# compute tw-way frequency table and add margins
the_data_bal <- dplyr::mutate(the_data_bal, 
                              tab = list(table(dplyr::first(.data$data)$value, dplyr::first(.data$data)$Frame)))
the_data_bal <- dplyr::mutate(the_data_bal,
                              tab = list(dplyr::first(.data$tab) / sum(dplyr::first(.data$tab))))
the_data_bal <- dplyr::mutate(the_data_bal, bat = .data$tab)
the_data_bal <- dplyr::mutate(the_data_bal, tab = list(addmargins(dplyr::first(.data$tab))))

# converting a table to a tibble is a two-step
the_data_bal <- dplyr::mutate(the_data_bal, tab = list(`class<-`(dplyr::first(.data$tab), 
                                                                 setdiff(class(dplyr::first(.data$tab)), "table"))))
the_data_bal <- dplyr::mutate(the_data_bal, tab = list(dplyr::as_tibble(dplyr::first(.data$tab), rownames = "value")))

the_data_lab <- dplyr::mutate(the_data_bal)

# order the levels correctly
the_data_bal <- dplyr::group_modify(the_data_bal, function(.x, .y) {
  dplyr::tibble(tab = if(.y$name == "Income") {
    z <- dplyr::first(.x$tab)
    z <- dplyr::mutate(z, r = as.numeric(stringr::str_extract(.data$value, "\\d+")))
    z <- dplyr::mutate(z, r = ifelse(is.na(.data$r), Inf, .data$r))
    z <- dplyr::arrange(z, .data$r)
    z <- dplyr::mutate(z, value = factor(.data$r, labels = .data$value))
    z <- dplyr::select(z, -.data$r)

    list(z)
  } else {.x$tab})
})
the_data_bal <- dplyr::group_modify(the_data_bal, function(.x, .y) {
  dplyr::tibble(tab = if(.y$name == "Education") {
    z <- dplyr::first(.x$tab)
    z <- dplyr::mutate(z, value = factor(.data$value,
                                         levels = c("Illiterate / no education",
                                                    "Elementary",
                                                    "Intermediate",
                                                    "Secondary",
                                                    "BA degree",
                                                    "MA degree or higher",
                                                    "Sum")))
    z <- dplyr::arrange(z, .data$value)

    list(z)
  } else {.x$tab})
})
the_data_bal <- dplyr::group_modify(the_data_bal, function(.x, .y) {
  dplyr::tibble(tab = if(.y$name == "Religiosity") {
    z <- dplyr::first(.x$tab)
    z <- dplyr::mutate(z, value = factor(.data$value,
                                         levels = c("Devoutly religious",
                                                    "Somewhat religious",
                                                    "Hardly religious",
                                                    "Sum")))
    z <- dplyr::arrange(z, .data$value)

    list(z)
  } else {.x$tab})
})
the_data_bal <- dplyr::group_modify(the_data_bal, function(.x, .y) {
  dplyr::tibble(tab = if(.y$name == "Support") {
    z <- dplyr::first(.x$tab)
    z <- dplyr::mutate(z, value = factor(.data$value,
                                         levels = c("Strongly Agree",
                                                    "Agree",
                                                    "Disagree",
                                                    "Strongly disagree",
                                                    "Sum")))
    z <- dplyr::arrange(z, .data$value)

    list(z)
  } else {.x$tab})
})

the_data_bal <- dplyr::group_by(the_data_bal)
the_data_bal <- dplyr::mutate(the_data_bal,
                              name = factor(.data$name, 
                                            levels = c(setdiff(sort(unique(.data$name)), c("Rating", "Support")), "Rating", "Support")))
the_data_bal <- dplyr::arrange(the_data_bal, .data$name)
the_data_bal <- dplyr::group_by(the_data_bal, .data$name)

# draw up latex tables
the_data_bal <- dplyr::mutate(the_data_bal,
                              kable = list(xtable::xtable(dplyr::first(.data$tab),
                                                          digits = 4)))
the_data_bal <- dplyr::mutate(the_data_bal, kable = xtable::print.xtable(dplyr::first(.data$kable),
                                                         floating         = FALSE,
                                                         include.rownames = FALSE,
                                                         comment          = FALSE,
                                                         print.results    = FALSE))
the_data_bal <- dplyr::mutate(the_data_bal, kable = paste0("\n\n\\begin{center}", .data$kable, "\\end{center}\n"))
the_data_bal <- dplyr::mutate(the_data_bal, kable = paste0("\n\n### ", dplyr::first(.data$name), "\n\n", .data$kable, "\n\n"))

invisible(purrr::map(the_data_bal$kable, cat))
```
